from pyspark import SparkConf,SparkContext


if __name__ == '__main__':
    # 构建SparkConf对象
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    # 构建SparkContext执行环境入口对象
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize(["hadoop spark hdfs","spark spark hdfs","hdfs hdfs hadoop"])

    rdd = rdd.flatMap(lambda x: x.split(" "))
    """
    flatMap方法，解除数据容器的嵌套
    f: (T) -> Iterable[U],
    接收一个参数：任意类型
    返回一个参数：可迭代的任意类型
    """

    print(rdd.collect())